#!/usr/bin/env python
# coding: utf-8

# ### Library Type Breakdown
# This portion of the data analysis focuses on library types to analyze whether certain library types experiences more drastic changes over others. 
# 
# Libraries Type data was extracted from the master Impact_of_COVID_on_Tech_Services_Clean_3.CSV sheet. Library types were originally sorted in Excel to include the dominant library types (Academic, Public, K-12, Special, Other, and Blanks), which may have included sub-categories, such as Academic libraries housing special collection or other library types within their institution or physical building. Data was re-sorted on 02/23/2022 to group responses into categories by inclusion of any library type keyword in the breakdown spreadsheets (e.g. A library self-reporting being Academic, Special, and Other would be represented in each of these breakdown spreadsheets to better reflect the cohesive trends happening in these larger institutions impacting their subsideary libraries).  
# 
# Data in staffing numbers columns were standardized to count total number of FT/PT staff and to remove any string text/uncertain values. Rows with NaN data will be removed when analyzing these columns. 

# Links to related Python script for this study: 
# 
# COVID_Impact_TechnicalServices_HolisticAnalysis (a holistic analysis of all survey data): https://doi.org/10.7910/DVN/SXMSDZ
# COVID_Impact_TechnicalServices_LibraryTypeAnalysis (a clustered analysis of impact by library type): https://doi.org/10.7910/DVN/SXMSDZ

# In[49]:


#Setup Numpy and Pandas
import pandas as pd
import numpy as np

#Import Matplotlib
import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')


# In[50]:


#Academic Libraries

#Import File
al=pd.read_csv('Impact_of_COVID_on_Tech_Services_Academic.csv')

#Data Analysis
print(al.head()) #Column names, data samples
print(al.tail()) #Tail sample, 261 rows
al.describe() #5 rows with 38 columns
type(al)


# In[51]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
al['Q5'] = al['Q5'].astype('str') 

#Tallied areas of technical services teams
al_areas=al['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(al_areas)


# In[82]:


academic_df={'Function':['Cataloging', 'Processing', 
                        'Electronic Resources', 'Acquisitions',
                        'Library Systems', 'Collection Development',
                        'Archives and/or Special Collections', 
                        'Federal Depository Library Program', 
                        'Interlibrary Loan', 'Other'], 'Number of Units Including Functions': [245, 207, 183, 179, 112, 94, 67, 53, 51, 23]}

x=academic_df['Function']
y=academic_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing', 'Electronic Resources', 
        'Acquisitions','Library Systems', 'Collection Development', 
        'Archives and/or Special Collections', 
        'Federal Depository Library Program', 
        'Interlibrary Loan', 'Other']

plt.pie(y, labels=labels, autopct='%1.1f%%')
plt.title('Technical Services Participation by Academic Libraries')
# plt.legend()
plt.show()

plt.savefig('Academic_Services.png')


# In[53]:


#Staffing Numbers Before and After
al_staffing=al[['Q6','Q7']]
al_staffing.dropna()


# In[54]:


#Average
print(al_staffing['Q6'].mean())
print(al_staffing['Q7'].mean())

# Before: 7.5
# After: 6.6
# Percent Change: 12% Decrease

#Sum
print(al_staffing['Q6'].sum())
print(al_staffing['Q7'].sum())

# Before: 1939.75
# After: 1685.75
# Difference: 254 positions lost
# Percent Change: 13.1% Decrease


# In[55]:


#Public Libraries

#Import File
pub=pd.read_csv('Impact_of_COVID_on_Tech_Services_Public.csv')

#Data Analysis
print(pub.head()) #Column names, data samples
print(pub.tail()) #Tail sample, 238 rows
pub.describe() #5 rows with 38 columns
type(pub)


# In[56]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
pub['Q5'] = pub['Q5'].astype('str') 

#Tallied areas of technical services teams
pub_areas=pub['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(pub_areas)


# In[109]:


public_df={'Function':['Cataloging', 'Processing', 
                        'Acquisitions', 'Collection Development',
                        'Electronic Resources', 'Interlibrary Loan',
                        'Library Systems', 
                        'Archives and/or Special Collections', 
                        'Other', 'Federal Depository Library Program'], 'Number of Units Including Functions': [230, 218, 182, 123, 106, 95, 71, 49, 17, 5]}

x=public_df['Function']
y=public_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing', 
        'Acquisitions', 'Collection Development',
        'Electronic Resources', 'Interlibrary Loan',
        'Library Systems', 'Archives and/or Special Collections', 
        'Other', 'Federal Depository Library Program']

plt.pie(y, labels=labels, autopct='%1.1f%%', pctdistance=0.85, textprops={'fontsize': 8})
plt.title('Technical Services Participation by Public Libraries')
# plt.legend()
plt.show()

plt.savefig('Public_Services.png')


# In[58]:


#Staffing Numbers Before and After
pub_staffing=pub[['Q6','Q7']]
pub_staffing.dropna()


# In[59]:


#Average
print(pub_staffing['Q6'].mean())
print(pub_staffing['Q7'].mean())

# Before: 7.0
# After: 6.6
# Percent Change: 5.71% Decrease

#Sum
print(pub_staffing['Q6'].sum())
print(pub_staffing['Q7'].sum())

# Before: 1626.25
# After: 1516.2
# Difference: 110.05 positions lost
# Percent change: 6.8% decrease


# In[60]:


#K-12

#Import File
k12=pd.read_csv('Impact_of_COVID_on_Tech_Services_K-12.csv')

#Data Analysis
print(k12.head()) #Column names, data samples
print(k12.tail()) #Tail sample, 45 rows
k12.describe() #5 rows with 38 columns
type(k12)


# In[61]:


#Staffing Numbers Before and After
k12_staffing=k12[['Q6','Q7']]
k12_staffing.dropna()


# In[62]:


#Average
print(k12_staffing['Q6'].mean())
print(k12_staffing['Q7'].mean())

# Before: 1.95
# After: 1.53
# Percent Change: 21.54% Decrease

#Sum
print(k12_staffing['Q6'].sum())
print(k12_staffing['Q7'].sum())

# Before: 86
# After: 66
# Difference: 20 positions lost
# Percent Change: 23.3% Decrease


# In[63]:


#Drop Index 6 as Extreme Outlier
k12_staffing=k12_staffing.drop(index=6)


# In[64]:


#Average
print(k12_staffing['Q6'].mean())
print(k12_staffing['Q7'].mean())

# Before: 1.49
# After: 1.52
# Percent Change: 2.01% Increase

#Sum
print(k12_staffing['Q6'].sum())
print(k12_staffing['Q7'].sum())

# Before: 64
# After: 64
# Difference: 0 positions lost
# Percent Change: 0%


# In[65]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
k12['Q5'] = k12['Q5'].astype('str') 

#Tallied areas of technical services teams
k12_areas=k12['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(k12_areas)


# In[98]:


k12_df={'Function':['Cataloging', 'Processing',
                    'Acquisitions', 'Collection Development',
                    'Electronic Resources', 'Library Systems',
                    'Interlibrary Loan'
                    'Archives and/or Special Collections', 
                    'Other'], 'Number of Units Including Functions': [38, 37, 32, 31, 31, 22, 20, 9, 7]}

x=k12_df['Function']
y=k12_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing',
        'Acquisitions', 'Collection Development',
        'Electronic Resources', 'Library Systems',
        'Interlibrary Loan',
        'Archives and/or Special Collections', 
        'Other']

plt.pie(y, labels=labels, autopct='%1.1f%%')
plt.title('Technical Services Participation by K-12 School Libraries')
# plt.legend()
plt.show()

plt.savefig('K12_Services.png')


# In[67]:


#Special Collections And/Or Archives

#Import File
sca=pd.read_csv('Impact_of_COVID_on_Tech_Services_Special_Collections_Archives.csv')

#Data Analysis
print(sca.head()) #Column names, data samples
print(sca.tail()) #Tail sample, 34 rows
sca.describe() #5 rows with 38 columns
type(sca)


# In[68]:


#Staffing Numbers Before and After
sca_staffing=sca[['Q6','Q7']]
sca_staffing.dropna()


# In[69]:


#Average
print(sca_staffing['Q6'].mean())
print(sca_staffing['Q7'].mean())

# Before: 3.67
# After: 3.68
# Percent Change: 0.27% Increase

#Sum
print(sca_staffing['Q6'].sum())
print(sca_staffing['Q7'].sum())

# Before: 88
# After: 92
# Difference: 4 positions gained

# Post Inclusion of Academic Libraries in Data Restructure
# Before: 4.12
# After: 3.97
# Percent Change: 3.97% Decrease

# Positions Lost
# Before: 136.0
# After: 131.0
# Difference: 5 positions lost


# In[70]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
sca['Q5'] = sca['Q5'].astype('str') 

#Tallied areas of technical services teams
sca_areas=sca['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(sca_areas)


# In[99]:


sca_df={'Function':['Cataloging', 'Processing', 'Archives and/or Special Collections',
                    'Electronic Resources', 'Acquisitions', 'Collection Development', 
                    'Library Systems','Federal Depository Library Program',
                    'Other', 'Interlibrary Loan'], 
        'Number of Units Including Functions': [30, 24, 22, 15, 14, 13, 13, 5, 5, 5]}

x=sca_df['Function']
y=sca_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing', 'Archives and/or Special Collections',
        'Electronic Resources', 'Acquisitions', 'Library Systems',
        'Collection Development', 'Federal Depository Library Program',
        'Other', 'Interlibrary Loan']

plt.pie(y, labels=labels, autopct='%1.1f%%')
plt.title('Technical Services Participation by Special Collections/Archives')
# plt.legend()
plt.show()

plt.savefig('SC_Archives_Services.png')


# In[72]:


#Other

#Import File
other=pd.read_csv('Impact_of_COVID_on_Tech_Services_Other.csv')

#Data Analysis
print(other.head()) #Column names, data samples
print(other.tail()) #Tail sample, 49 rows
other.describe() #5 rows with 38 columns
type(other)


# In[73]:


#Staffing Numbers Before and After
other_staffing=other[['Q6','Q7']]
other_staffing.dropna()


# In[74]:


#Average
print(other_staffing['Q6'].mean())
print(other_staffing['Q7'].mean())

# Before: 4.90
# After: 4.41
# Percent Change: 8.84% Decrease

#Sum
print(other_staffing['Q6'].sum())
print(other_staffing['Q7'].sum())

# Before: 199.6
# After: 181.8
# Difference: 17.8 positions lost

# Post Data Restructure
# Before: 4.90
# After: 4.41
# Percent Change: 10% Decrease

# Positions Lost
# Before: 225.6
# After: 202.8
# Difference: 22.8 positions lost


# In[75]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
other['Q5'] = other['Q5'].astype('str') 

#Tallied areas of technical services teams
other_areas=other['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(other_areas)


# In[100]:


#NaN Value Excluded

other_df={'Function':['Cataloging', 'Processing', 'Acquisitions',
                    'Electronic Resources', 'Library Systems', 'Collection Development',
                    'Interlibrary Loan', 'Archives and/or Special Collections',
                    'Federal Depository Library Program', 'Other'],
        'Number of Units Including Functions': [44, 37, 29, 27, 22, 21, 19, 12, 11, 7]}

x=other_df['Function']
y=other_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing', 'Acquisitions',
        'Electronic Resources', 'Library Systems',
        'Collection Development','Interlibrary Loan', 
        'Archives and/or Special Collections',
        'Federal Depository Library Program', 'Other']

plt.pie(y, labels=labels, autopct='%1.1f%%')
plt.title('Technical Services Participation by Other Libraries')
# plt.legend()
plt.show()

plt.savefig('Other_Services.png')


# In[77]:


#Blanks

#Import File
blanks=pd.read_csv('Impact_of_COVID_on_Tech_Services_Blank_Types.csv')

#Data Analysis
print(blanks.head()) #Column names, data samples
print(blanks.tail()) #Tail sample, 46 rows
blanks.describe() #5 rows with 38 columns
type(blanks)


# In[78]:


#Staffing Numbers Before and After
blanks_staffing=blanks[['Q6','Q7']]
blanks_staffing.dropna()


# In[79]:


#Average
print(blanks_staffing['Q6'].mean())
print(blanks_staffing['Q7'].mean())

# Before: 4.23
# After: 3.76
# Percent Change: 11.11% Decrease

#Sum
print(blanks_staffing['Q6'].sum())
print(blanks_staffing['Q7'].sum())

# Before: 162.5
# After: 143.0
# Difference: 19.5 positions lost

# Post Data Restructure
# Before: 3.96
# After: 3.43
# Percent Change: 13.39% Decrease

# Positions Lost
# Before: 146.5
# After: 127.0
# Difference: 19.5 positions lost


# In[81]:


#Question 5 Value Counts
#Which areas is your technical services team comprised of? If you do not work in a technical services team or unit (e.g. a solo librarian), which technical tasks do you oversee?
#Please select all that apply.

#Convert column type to string
blanks['Q5'] = blanks['Q5'].astype('str') 

#Tallied areas of technical services teams
blanks_areas=blanks['Q5'].str.split(',', expand=True).stack().value_counts().to_frame()
print(blanks_areas)


# In[101]:


#NaN Value Excluded

blanks_df={'Function':['Cataloging', 'Processing', 'Acquisitions',
                       'Electronic Resources', 'Collection Development',
                       'Library Systems', 'Interlibrary Loan', 'Archives and/or Special Collections',
                       'Other', 'Federal Depository Library Program'],
           'Number of Units Including Functions': [39, 34, 28, 26, 24, 21, 13, 8, 6, 3]}

x=blanks_df['Function']
y=blanks_df['Number of Units Including Functions']
labels=['Cataloging', 'Processing', 'Acquisitions',
        'Electronic Resources', 'Collection Development',
        'Library Systems', 'Interlibrary Loan', 'Archives and/or Special Collections',
        'Other', 'Federal Depository Library Program']

plt.pie(y, labels=labels, autopct='%1.1f%%')
plt.title('Technical Services Participation by Blank Response Libraries')
# plt.legend()
plt.show()

plt.savefig('Blanks_Services.png')

